# package
library(ape)
## Warning: package 'ape' was built under R version 3.5.2
library(clusterProfiler)
## 
## clusterProfiler v3.10.1  For help: https://guangchuangyu.github.io/software/clusterProfiler
## 
## If you use clusterProfiler in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS: A Journal of Integrative Biology. 2012, 16(5):284-287.
#bring in phase1 allele frequencies
phase1.all.freqs<-read.csv(file = "~/Desktop/anoph.3.march.2020/phase1.all.freqs.csv")
#make column for id
phase1.all.freqs$id<-paste(phase1.all.freqs$chrom,phase1.all.freqs$POS)
#bring in dataframe with locality and environmental data for each individual phase1
phase1.alldata<-read.csv(file = "~/Desktop/anoph.3.march.2020/phase1.allvariables.csv")
#subset to only the variables we need (lat,long,hum,temp,precip)
meta<-phase1.alldata[,c(16,17,18,20,22)]
#subset only the unique lat/longs
metapops<-unique(meta)
#sort by latitude to match the order of the allele frequency files
sort.pops.phase1 <-metapops[order(metapops$latitude),]

## read in lfmm outlier table
lfmm.outlier.table <- read.csv("~/Downloads/vetted.lfmm.temp.env.csv")[,2:3]
#add separate chrom and pos columns
lfmm.outlier.table$chrom<-data.frame(do.call('rbind', strsplit(as.character(lfmm.outlier.table$id),' ',fixed=TRUE)))[,1]
lfmm.outlier.table$pos<-data.frame(do.call('rbind', strsplit(as.character(lfmm.outlier.table$id),' ',fixed=TRUE)))[,2]

## read in bayescan outlier table
bayescan.outlier.table <- read.csv("~/Downloads/vetted.bayescenv.temp.env.csv")[,2:3]
#add separate chrom and pos columns
bayescan.outlier.table$chrom<-data.frame(do.call('rbind', strsplit(as.character(bayescan.outlier.table$id),' ',fixed=TRUE)))[,1]
bayescan.outlier.table$pos<-data.frame(do.call('rbind', strsplit(as.character(bayescan.outlier.table$id),' ',fixed=TRUE)))[,2]

## read in genomic regions and attributes file
genomic_location <- read.gff("~/Downloads/Anopheles_gambiae.AgamP4.47.chr.gff3.gz")
#keep only regions identified as "gene"
gen_regions <- genomic_location[genomic_location$type == "gene", ] # subset to only gene regions

#pull all genes queried
all.gene.id<-data.frame(do.call('rbind', strsplit(as.character(gen_regions$attributes),';',fixed=TRUE)))[,1]
## Warning in rbind(c("ID=gene:AGAP004677", "biotype=protein_coding",
## "description=methylenetetrahydrofolate dehydrogenase(NAD+) / 5%2C10-
## methenyltetrahydrofolate [Source:VB Community Annotation]", : number of columns
## of result is not a multiple of vector length (arg 1)
all.genes<-data.frame(do.call('rbind', strsplit(as.character(all.gene.id),':',fixed=TRUE)))[,2]
#write.table(all.genes, "~/Downloads/all.genes.anopheles.gambiae.txt", row.names = F, col.names = F, quote = F)
#define chroms
chroms <- c("2R", "2L", "3R", "3L","X") # chromosomes of interest

#initialize gene.list for lfmm
lfmm.gene.list<-data.frame()
#fill gene.list by matching location of genes of interest in gene feature regions by chromosomes
for (i in 1:length(chroms)){
  genloc <- gen_regions[gen_regions$seqid == chroms[i], ] # genome info per chromosome
  pos <- as.numeric(as.character(lfmm.outlier.table[lfmm.outlier.table$chrom == chroms[i],4])) # outlierFST position per chromosome
  for (w in 1:length(pos)) {
    if (nrow(genloc[pos[w] >= genloc$start & pos[w] <= genloc$end,]) !=0){
    lfmm.gene.list<-rbind(lfmm.gene.list, cbind(genloc[pos[w] >= genloc$start & pos[w] <= genloc$end,], pos[w]))
    }
  }
  cat("chromosome", i, "of", length(chroms), "finished\n\n")
}
## chromosome 1 of 5 finished
## 
## chromosome 2 of 5 finished
## 
## chromosome 3 of 5 finished
## 
## chromosome 4 of 5 finished
## 
## chromosome 5 of 5 finished
#pull out gene ID as its own column
lfmm.gene.list$gene.id<-data.frame(do.call('rbind', strsplit(as.character(lfmm.gene.list$attributes),';',fixed=TRUE)))[,1]
## Warning in rbind(c("ID=gene:AGAP002336", "biotype=protein_coding",
## "gene_id=AGAP002336", : number of columns of result is not a multiple of vector
## length (arg 1)
lfmm.gene.list$gene<-data.frame(do.call('rbind', strsplit(as.character(lfmm.gene.list$gene.id),':',fixed=TRUE)))[,2]

#make SNP ID column
lfmm.gene.list$SNPid<-paste(lfmm.gene.list$seqid,lfmm.gene.list$`pos[w]`)

#subset
lfmm.gene.list<-lfmm.gene.list[,c("SNPid","gene")]

#number of candidate snps
nrow(lfmm.outlier.table)
## [1] 2980
#calculate percentage of lfmm candidate SNPs that were in a gene
nrow(lfmm.gene.list)/nrow(lfmm.outlier.table)
## [1] 0.5325503
#calc % of the genome covered by the genes we are searching
sum(gen_regions$end-gen_regions$start)/278268413 
## [1] 0.3935553
table(lfmm.gene.list$gene)[table(lfmm.gene.list$gene) >10]
## 
## AGAP000519 AGAP000801 AGAP000815 AGAP000821 AGAP002858 AGAP002859 AGAP003997 
##         40         19         19         11         23         61         13 
## AGAP004692 AGAP004707 AGAP004728 AGAP004731 AGAP005073 AGAP005165 AGAP005595 
##         11         15         12         11         14         13         11 
## AGAP009189 AGAP009201 AGAP009805 AGAP010184 AGAP010283 AGAP010295 AGAP010302 
##         11         15         13         41         14         32         18 
## AGAP010303 AGAP010310 AGAP010311 AGAP010312 AGAP010313 AGAP010314 AGAP010750 
##         11         29         21         40         17         34         23 
## AGAP029238 AGAP029563 AGAP029589 
##         15         31         15
hist(table(lfmm.gene.list$gene))

#write.table(unique(lfmm.gene.list$gene), "~/Downloads/lfmm.temp.gene.list.txt", row.names = F, col.names = F, quote = F)
#write.table(lfmm.gene.list, "~/Downloads/lfmm.temp.gene.snp.match.txt", row.names = F, col.names = T, quote = F)
#initialize gene.list for bayescan
bayescan.gene.list<-data.frame()
#fill gene.list
for (i in 1:length(chroms)){
  genloc <- gen_regions[gen_regions$seqid == chroms[i], ] # genome info per chromosome
  pos <- as.numeric(as.character(bayescan.outlier.table[bayescan.outlier.table$chrom == chroms[i],4])) # outlierFST position per chromosome
  for (w in 1:length(pos)) {
    if (nrow(genloc[pos[w] >= genloc$start & pos[w] <= genloc$end,]) !=0){
      bayescan.gene.list<-rbind(bayescan.gene.list, cbind(genloc[pos[w] >= genloc$start & pos[w] <= genloc$end,], pos[w]))
    }
  }
  cat("chromosome", i, "of", length(chroms), "finished\n\n")
}
## chromosome 1 of 5 finished
## 
## chromosome 2 of 5 finished
## 
## chromosome 3 of 5 finished
## 
## chromosome 4 of 5 finished
## 
## chromosome 5 of 5 finished
#pull out gene ID as its own column
bayescan.gene.list$gene.id<-data.frame(do.call('rbind', strsplit(as.character(bayescan.gene.list$attributes),';',fixed=TRUE)))[,1]
## Warning in rbind(c("ID=gene:AGAP002444", "Name=GPROP12",
## "biotype=protein_coding", : number of columns of result is not a multiple of
## vector length (arg 3)
bayescan.gene.list$gene<-data.frame(do.call('rbind', strsplit(as.character(bayescan.gene.list$gene.id),':',fixed=TRUE)))[,2]

#make SNP ID column
bayescan.gene.list$SNPid<-paste(bayescan.gene.list$seqid,bayescan.gene.list$`pos[w]`)

#subset
bayescan.gene.list<-bayescan.gene.list[,c("SNPid","gene")]

#number of candidate snps
nrow(bayescan.outlier.table)
## [1] 6481
#calculate percentage of bayescan candidate SNPs that were in a gene
nrow(bayescan.gene.list)/nrow(bayescan.outlier.table)
## [1] 0.4912822
#calc % of the genome covered by the genes we are searching
sum(gen_regions$end-gen_regions$start)/278268413 
## [1] 0.3935553
table(bayescan.gene.list$gene)[table(bayescan.gene.list$gene) >10]
## 
## AGAP000519 AGAP000812 AGAP000815 AGAP000821 AGAP000829 AGAP000834 AGAP000841 
##         22         25         23         45         14         12         15 
## AGAP000844 AGAP000847 AGAP000848 AGAP000854 AGAP000915 AGAP000929 AGAP000940 
##         11         57         18         14         66         17         41 
## AGAP000962 AGAP000974 AGAP000998 AGAP001004 AGAP001015 AGAP001022 AGAP001052 
##         29         17         45         12         19         17         23 
## AGAP002858 AGAP002859 AGAP002881 AGAP002883 AGAP004761 AGAP004766 AGAP004767 
##         67         64         43         12         11         12         17 
## AGAP004772 AGAP004775 AGAP004783 AGAP004795 AGAP004805 AGAP004835 AGAP004846 
##         12         11         11         14         24         20         19 
## AGAP004873 AGAP007736 AGAP009183 AGAP009189 AGAP009201 AGAP009793 AGAP009860 
##         24         56         13         16         34         23         18 
## AGAP009932 AGAP009934 AGAP010184 AGAP010186 AGAP010335 AGAP010378 AGAP010473 
##         12         14         61         31         13         13         22 
## AGAP010503 AGAP010599 AGAP010794 AGAP013143 AGAP028655 AGAP029105 AGAP029113 
##         14         41         12         11         13         14         13 
## AGAP029238 AGAP029471 AGAP029488 AGAP029511 AGAP029589 
##         11         20         25         25         31
hist(table(bayescan.gene.list$gene))

#write.table(unique(bayescan.gene.list$gene), "~/Downloads/bayescan.temp.gene.list.txt", row.names = F, col.names = F, quote = F)
#write.table(bayescan.gene.list, "~/Downloads/bayescan.temp.gene.snp.match.txt", row.names = F, col.names = T, quote = F)
#look into how many genes overlap between the methods
intersect(unique(lfmm.gene.list$gene),unique(bayescan.gene.list$gene))
##   [1] "AGAP002444" "AGAP002449" "AGAP002817" "AGAP002824" "AGAP002832"
##   [6] "AGAP002835" "AGAP002845" "AGAP002846" "AGAP002857" "AGAP002858"
##  [11] "AGAP002859" "AGAP002869" "AGAP000586" "AGAP029190" "AGAP002874"
##  [16] "AGAP002875" "AGAP002876" "AGAP002879" "AGAP002880" "AGAP002881"
##  [21] "AGAP002883" "AGAP002886" "AGAP002888" "AGAP002892" "AGAP003623"
##  [26] "AGAP003632" "AGAP003633" "AGAP003643" "AGAP003651" "AGAP003654"
##  [31] "AGAP003656" "AGAP003658" "AGAP004644" "AGAP013143" "AGAP004671"
##  [36] "AGAP004672" "AGAP004689" "AGAP004691" "AGAP004692" "AGAP004693"
##  [41] "AGAP004694" "AGAP004677" "AGAP028434" "AGAP029368" "AGAP004696"
##  [46] "AGAP029667" "AGAP028436" "AGAP028437" "AGAP004707" "AGAP004714"
##  [51] "AGAP004718" "AGAP004723" "AGAP004725" "AGAP004726" "AGAP004727"
##  [56] "AGAP004728" "AGAP004729" "AGAP029478" "AGAP004731" "AGAP029113"
##  [61] "AGAP004742" "AGAP004746" "AGAP004756" "AGAP004758" "AGAP004761"
##  [66] "AGAP004766" "AGAP004767" "AGAP004768" "AGAP004769" "AGAP004772"
##  [71] "AGAP004775" "AGAP004781" "AGAP004783" "AGAP004792" "AGAP004795"
##  [76] "AGAP004797" "AGAP004800" "AGAP004801" "AGAP004805" "AGAP004808"
##  [81] "AGAP004817" "AGAP004819" "AGAP004823" "AGAP004824" "AGAP004825"
##  [86] "AGAP004834" "AGAP004682" "AGAP004845" "AGAP004847" "AGAP004861"
##  [91] "AGAP004866" "AGAP029807" "AGAP004871" "AGAP004873" "AGAP004877"
##  [96] "AGAP004881" "AGAP004890" "AGAP004892" "AGAP028432" "AGAP009183"
## [101] "AGAP009189" "AGAP009191" "AGAP009195" "AGAP009196" "AGAP009198"
## [106] "AGAP009200" "AGAP009201" "AGAP009202" "AGAP029480" "AGAP009206"
## [111] "AGAP029511" "AGAP009210" "AGAP009212" "AGAP009213" "AGAP009224"
## [116] "AGAP009245" "AGAP009389" "AGAP029641" "AGAP009708" "AGAP009723"
## [121] "AGAP029589" "AGAP009744" "AGAP009757" "AGAP009770" "AGAP009784"
## [126] "AGAP009789" "AGAP009793" "AGAP029125" "AGAP029567" "AGAP009799"
## [131] "AGAP029238" "AGAP009805" "AGAP009809" "AGAP029648" "AGAP009860"
## [136] "AGAP009896" "AGAP009925" "AGAP009947" "AGAP010003" "AGAP010026"
## [141] "AGAP010090" "AGAP010147" "AGAP010158" "AGAP010160" "AGAP010162"
## [146] "AGAP010184" "AGAP010186" "AGAP010229" "AGAP010265" "AGAP010283"
## [151] "AGAP010286" "AGAP010290" "AGAP010293" "AGAP010294" "AGAP010295"
## [156] "AGAP010297" "AGAP010302" "AGAP010303" "AGAP010304" "AGAP029542"
## [161] "AGAP008173" "AGAP007736" "AGAP010330" "AGAP029651" "AGAP010782"
## [166] "AGAP010793" "AGAP010794" "AGAP029620" "AGAP029564" "AGAP010311"
## [171] "AGAP010990" "AGAP010312" "AGAP010394" "AGAP010410" "AGAP010313"
## [176] "AGAP029616" "AGAP010509" "AGAP010310" "AGAP010598" "AGAP010599"
## [181] "AGAP029596" "AGAP010691" "AGAP010740" "AGAP029636" "AGAP029471"
## [186] "AGAP010750" "AGAP013356" "AGAP000801" "AGAP000807" "AGAP000812"
## [191] "AGAP000814" "AGAP000815" "AGAP000821" "AGAP000823" "AGAP000825"
## [196] "AGAP000826" "AGAP000829" "AGAP000832" "AGAP000833" "AGAP000834"
## [201] "AGAP000835" "AGAP028655" "AGAP013506" "AGAP000840" "AGAP000841"
## [206] "AGAP000844" "AGAP000847" "AGAP000854" "AGAP029672" "AGAP000861"
## [211] "AGAP000862" "AGAP000863" "AGAP000894" "AGAP000901" "AGAP000929"
## [216] "AGAP012964" "AGAP000958" "AGAP000965" "AGAP000974" "AGAP000998"
## [221] "AGAP001015" "AGAP000413" "AGAP000519"

LFMM overrep testing

#overrepresentation testing
#read in each GO term to gene map (col1=geneID,col2=GOterm)
mart.export<-read.table("~/Downloads/mart_export (3).txt", sep = "\t", header = T)

#calculate overrepresentation for lfmm temp
lfmm.temp.enriched<-enricher(gene = lfmm.gene.list$gene,
                 universe = all.genes,
                   pAdjustMethod = "fdr",
                 TERM2GENE= mart.export[,c(2,1)]
)
result<-lfmm.temp.enriched@result

lfmm.temp.enriched.sig<-result[result$p.adjust <= .05,]
lfmm.temp.enriched.sig
#write.table(lfmm.temp.enriched.sig[,c("ID","geneID")], file="~/Downloads/lfmm.temp.go.overrep.txt",
#            row.names = F, col.names = F, quote = F)

Bayescenv overrep testing

#calculate overrepresentation for bayescan temp
bayescan.temp.enriched<-enricher(gene = bayescan.gene.list$gene,
                            universe = all.genes,
                            pAdjustMethod = "fdr",
                            TERM2GENE= mart.export[,c(2,1)]
                            )
result<-bayescan.temp.enriched@result

#retain significantly enriched GO terms
bayescan.temp.enriched.sig<-result[result$p.adjust <= .05,]

#check out significantly overrepresented GO terms
bayescan.temp.enriched.sig
#get a vector of genes we found SNPs in for each enriched GO term
#lfmm
go.l.0004930<-unlist(strsplit(as.character(lfmm.temp.enriched.sig$geneID[lfmm.temp.enriched.sig$ID == "GO:0004930"]),'/'))
go.l.0007186<-unlist(strsplit(as.character(lfmm.temp.enriched.sig$geneID[lfmm.temp.enriched.sig$ID == "GO:0007186"]),'/'))
#bayescan
go.b.0004930<-unlist(strsplit(as.character(bayescan.temp.enriched.sig$geneID[bayescan.temp.enriched.sig$ID == "GO:0004930"]),'/'))
go.0016055<-unlist(strsplit(as.character(bayescan.temp.enriched.sig$geneID[bayescan.temp.enriched.sig$ID == "GO:0016055"]),'/'))
go.0007275<-unlist(strsplit(as.character(bayescan.temp.enriched.sig$geneID[bayescan.temp.enriched.sig$ID == "GO:0007275"]),'/'))
go.b.0007186<-unlist(strsplit(as.character(bayescan.temp.enriched.sig$geneID[bayescan.temp.enriched.sig$ID == "GO:0007186"]),'/'))
go.0005216<-unlist(strsplit(as.character(bayescan.temp.enriched.sig$geneID[bayescan.temp.enriched.sig$ID == "GO:0005216"]),'/'))
go.0005887<-unlist(strsplit(as.character(bayescan.temp.enriched.sig$geneID[bayescan.temp.enriched.sig$ID == "GO:0005887"]),'/'))
go.0006813<-unlist(strsplit(as.character(bayescan.temp.enriched.sig$geneID[bayescan.temp.enriched.sig$ID == "GO:0006813"]),'/'))
go.0055085<-unlist(strsplit(as.character(bayescan.temp.enriched.sig$geneID[bayescan.temp.enriched.sig$ID == "GO:0055085"]),'/'))
go.0005249<-unlist(strsplit(as.character(bayescan.temp.enriched.sig$geneID[bayescan.temp.enriched.sig$ID == "GO:0005249"]),'/'))
go.0006811<-unlist(strsplit(as.character(bayescan.temp.enriched.sig$geneID[bayescan.temp.enriched.sig$ID == "GO:0006811"]),'/'))

plot all SNPs associated w/ GO:0004930

lfmm.gene.list.go.0004930<-lfmm.gene.list[lfmm.gene.list$gene %in% go.l.0004930,]
#plot all SNPs associated w/ GO:0004930
sig.pos<-lfmm.gene.list.go.0004930$SNPid
gene<-droplevels(lfmm.gene.list.go.0004930$gene)
#plot allele freqs vs humidity for these SNPs
par(mfrow=c(3,3))
for (i in 1:length(sig.pos)){
  frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == sig.pos[i],3:16]))
  plot(sort.pops.phase1$hannual,frq, main = gene[i], ylim = c(0,1), xlab=sig.pos[i])
  abline(lm(frq~sort.pops.phase1$hannual))
}

plot all SNPs associated w/ GO:0007186

lfmm.gene.list.go.0007186<-lfmm.gene.list[lfmm.gene.list$gene %in% go.l.0007186,]
#plot all SNPs associated w/ GO:0007186
sig.pos<-lfmm.gene.list.go.0007186$SNPid
gene<-droplevels(lfmm.gene.list.go.0007186$gene)
#plot allele freqs vs humidity for these SNPs
par(mfrow=c(3,3))
for (i in 1:length(sig.pos)){
  frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == sig.pos[i],3:16]))
  plot(sort.pops.phase1$hannual,frq, main = gene[i], ylim = c(0,1), xlab=sig.pos[i])
  abline(lm(frq~sort.pops.phase1$hannual))
}

plot all SNPs associated w/ GO:0004930

bayescan.gene.list.go.0004930<-bayescan.gene.list[bayescan.gene.list$gene %in% go.b.0004930,]
#plot all SNPs associated w/ GO:0004930
sig.pos<-bayescan.gene.list.go.0004930$SNPid
gene<-droplevels(bayescan.gene.list.go.0004930$gene)
#plot allele freqs vs humidity for these SNPs
par(mfrow=c(3,3))
for (i in 1:length(sig.pos)){
  frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == sig.pos[i],3:16]))
  plot(sort.pops.phase1$hannual,frq, main = gene[i], ylim = c(0,1), xlab=sig.pos[i])
  abline(lm(frq~sort.pops.phase1$hannual))
}

plot all SNPs associated w/ GO:0016055

bayescan.gene.list.go.0016055<-bayescan.gene.list[bayescan.gene.list$gene %in% go.0016055,]
#plot all SNPs associated w/ GO:0016055
sig.pos<-bayescan.gene.list.go.0016055$SNPid
gene<-droplevels(bayescan.gene.list.go.0016055$gene)
#plot allele freqs vs humidity for these SNPs
par(mfrow=c(3,3))
for (i in 1:length(sig.pos)){
  frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == sig.pos[i],3:16]))
  plot(sort.pops.phase1$hannual,frq, main = gene[i], ylim = c(0,1), xlab=sig.pos[i])
  abline(lm(frq~sort.pops.phase1$hannual))
}

plot all SNPs associated w/ GO:0007275

bayescan.gene.list.go.0007275<-bayescan.gene.list[bayescan.gene.list$gene %in% go.0007275,]
#plot all SNPs associated w/ GO:0007275
sig.pos<-bayescan.gene.list.go.0007275$SNPid
gene<-droplevels(bayescan.gene.list.go.0007275$gene)
#plot allele freqs vs humidity for these SNPs
par(mfrow=c(3,3))
for (i in 1:length(sig.pos)){
  frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == sig.pos[i],3:16]))
  plot(sort.pops.phase1$hannual,frq, main = gene[i], ylim = c(0,1), xlab=sig.pos[i])
  abline(lm(frq~sort.pops.phase1$hannual))
}

plot all SNPs associated w/ GO:0007186

bayescan.gene.list.go.0007186<-bayescan.gene.list[bayescan.gene.list$gene %in% go.b.0007186,]
#plot all SNPs associated w/ GO:0007186
sig.pos<-bayescan.gene.list.go.0007186$SNPid
gene<-droplevels(bayescan.gene.list.go.0007186$gene)
#plot allele freqs vs humidity for these SNPs
par(mfrow=c(3,3))
for (i in 1:length(sig.pos)){
  frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == sig.pos[i],3:16]))
  plot(sort.pops.phase1$hannual,frq, main = gene[i], ylim = c(0,1), xlab=sig.pos[i])
  abline(lm(frq~sort.pops.phase1$hannual))
}

plot all SNPs associated w/ GO:0005216

bayescan.gene.list.go.0005216<-bayescan.gene.list[bayescan.gene.list$gene %in% go.0005216,]
#plot all SNPs associated w/ GO:0005216
sig.pos<-bayescan.gene.list.go.0005216$SNPid
gene<-droplevels(bayescan.gene.list.go.0005216$gene)
#plot allele freqs vs humidity for these SNPs
par(mfrow=c(3,3))
for (i in 1:length(sig.pos)){
  frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == sig.pos[i],3:16]))
  plot(sort.pops.phase1$hannual,frq, main = gene[i], ylim = c(0,1), xlab=sig.pos[i])
  abline(lm(frq~sort.pops.phase1$hannual))
}

plot all SNPs associated w/ GO:0005887

bayescan.gene.list.go.0005887<-bayescan.gene.list[bayescan.gene.list$gene %in% go.0005887,]
#plot all SNPs associated w/ GO:0005887
sig.pos<-bayescan.gene.list.go.0005887$SNPid
gene<-droplevels(bayescan.gene.list.go.0005887$gene)
#plot allele freqs vs humidity for these SNPs
par(mfrow=c(3,3))
for (i in 1:length(sig.pos)){
  frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == sig.pos[i],3:16]))
  plot(sort.pops.phase1$hannual,frq, main = gene[i], ylim = c(0,1), xlab=sig.pos[i])
  abline(lm(frq~sort.pops.phase1$hannual))
}

plot all SNPs associated w/ GO:0006813

bayescan.gene.list.go.0006813<-bayescan.gene.list[bayescan.gene.list$gene %in% go.0006813,]
#plot all SNPs associated w/ GO:0006813
sig.pos<-bayescan.gene.list.go.0006813$SNPid
gene<-droplevels(bayescan.gene.list.go.0006813$gene)
#plot allele freqs vs humidity for these SNPs
par(mfrow=c(3,3))
for (i in 1:length(sig.pos)){
  frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == sig.pos[i],3:16]))
  plot(sort.pops.phase1$hannual,frq, main = gene[i], ylim = c(0,1), xlab=sig.pos[i])
  abline(lm(frq~sort.pops.phase1$hannual))
}

plot all SNPs associated w/ GO:0055085

bayescan.gene.list.go.0055085<-bayescan.gene.list[bayescan.gene.list$gene %in% go.0055085,]
#plot all SNPs associated w/ GO:0055085
sig.pos<-bayescan.gene.list.go.0055085$SNPid
gene<-droplevels(bayescan.gene.list.go.0055085$gene)
#plot allele freqs vs humidity for these SNPs
par(mfrow=c(3,3))
for (i in 1:length(sig.pos)){
  frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == sig.pos[i],3:16]))
  plot(sort.pops.phase1$hannual,frq, main = gene[i], ylim = c(0,1), xlab=sig.pos[i])
  abline(lm(frq~sort.pops.phase1$hannual))
}

plot all SNPs associated w/ GO:0005249

bayescan.gene.list.go.0005249<-bayescan.gene.list[bayescan.gene.list$gene %in% go.0005249,]
#plot all SNPs associated w/ GO:0005249
sig.pos<-bayescan.gene.list.go.0005249$SNPid
gene<-droplevels(bayescan.gene.list.go.0005249$gene)
#plot allele freqs vs humidity for these SNPs
par(mfrow=c(3,3))
for (i in 1:length(sig.pos)){
  frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == sig.pos[i],3:16]))
  plot(sort.pops.phase1$hannual,frq, main = gene[i], ylim = c(0,1), xlab=sig.pos[i])
  abline(lm(frq~sort.pops.phase1$hannual))
}

plot all SNPs associated w/ GO:0006811

bayescan.gene.list.go.0006811<-bayescan.gene.list[bayescan.gene.list$gene %in% go.0006811,]
#plot all SNPs associated w/ GO:0006811
sig.pos<-bayescan.gene.list.go.0006811$SNPid
gene<-droplevels(bayescan.gene.list.go.0006811$gene)
#plot allele freqs vs humidity for these SNPs
par(mfrow=c(3,3))
for (i in 1:length(sig.pos)){
  frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == sig.pos[i],3:16]))
  plot(sort.pops.phase1$hannual,frq, main = gene[i], ylim = c(0,1), xlab=sig.pos[i])
  abline(lm(frq~sort.pops.phase1$hannual))
}

Best places to find info:

uniprot database:

uniprot.org/

UC Irvine AGAM Gene Expression Profile

http://www.angagepuci.bio.uci.edu/

AG1000G selection atlas

https://malariagen.github.io/agam-selection-atlas/0.1-alpha3/index.html